In [ ]:
import pandas as pd
import numpy as np
import plotly.express as px
data= pd.read_csv("spotify-2023.csv")
data['streams'] = pd.to_numeric(data['streams'], errors='coerce')
In [ ]:
most_streamed = data.loc[data.groupby('released_year')['streams'].idxmax()]

clean_data = most_streamed[['track_name', 'artist(s)_name', 'released_year', 'streams']]
fig = px.bar(clean_data, x='released_year', y='streams')

fig.show(renderer='notebook')
In [ ]:
top_songs=data.groupby('released_year').apply(lambda group :group .nlargest(3,'streams')). reset_index(drop=True)
top_songs=top_songs[['track_name', 'artist(s)_name', 'released_year', 'streams']]
top_songs=top_songs.query("""released_year >=2010""")
top_songs

fig = px.bar(top_songs, x='released_year', y='streams',        hover_data={'track_name','artist(s)_name'},color_continuous_scale="Thermal",color="streams",title='top 3streamed songsby year')




fig.show(renderer='notebook')
In [ ]:
song_keys =data [['released_year' ,"key","streams"]]
group=song_keys.groupby('key')['streams'].sum().reset_index()
def tomillions(x):
    return x/1e6